From bada1c2a2ab859b483696d152114259a254480e5 Mon Sep 17 00:00:00 2001
From: Adrian Freund <adrian@freund.io>
Date: Tue, 21 Sep 2021 15:23:14 +0200
Subject: [PATCH 1/3] Added pep2bib.py for generating bibtex entries

---
 .gitignore       |   1 +
 Makefile         |   4 ++
 pep2bib.py       | 131 +++++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt |   3 ++
 4 files changed, 139 insertions(+)
 create mode 100755 pep2bib.py

diff --git a/.gitignore b/.gitignore
index b9c89215748..f348631f4f3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@ pep-0000.txt
 pep-0000.rst
 pep-????.html
 peps.rss
+peps.bib
 __pycache__
 *.pyc
 *.pyo
diff --git a/Makefile b/Makefile
index 0f201b0c04a..e5858d29c64 100644
--- a/Makefile
+++ b/Makefile
@@ -27,12 +27,16 @@ pep-0000.rst: $(wildcard pep-????.txt) $(wildcard pep-????.rst) $(wildcard pep0/
 rss:
 	$(PYTHON) pep2rss.py .
 
+bib: pep-0000.rst
+	$(PYTHON) pep2bib.py .
+
 install:
 	echo "Installing is not necessary anymore. It will be done in post-commit."
 
 clean:
 	-rm pep-0000.rst
 	-rm *.html
+	-rm *.bib
 	-rm -rf build
 
 update:
diff --git a/pep2bib.py b/pep2bib.py
new file mode 100755
index 00000000000..6287c0c64b0
--- /dev/null
+++ b/pep2bib.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+
+# usage: python3 pep2bib.py .
+
+import datetime
+import glob
+import os
+import re
+import sys
+import time
+from pybtex.database import Entry, BibliographyData
+
+BIB_PATH = os.path.join(sys.argv[1], 'peps.bib')
+
+
+def firstline_startingwith(full_path, text):
+    result = None
+    for line in open(full_path, encoding="utf-8"):
+        if result is not None:
+            if not line[0].strip():  # Line begins with whitespace
+                result += line
+            else:
+                return result
+        if line.startswith(text):
+            result = line[len(text):].strip()
+    return None
+
+
+def pep_creation_dt(full_path):
+    created_str = firstline_startingwith(full_path, 'Created:')
+    # bleh, I was hoping to avoid re but some PEPs editorialize
+    # on the Created line
+    m = re.search(r'''(\d+-\w+-\d{4})''', created_str)
+    if not m:
+        # some older ones have an empty line, that's okay, if it's old
+        # we ipso facto don't care about it.
+        # "return None" would make the most sense but datetime objects
+        # refuse to compare with that. :-|
+        return datetime.datetime(*time.localtime(0)[:6])
+    created_str = m.group(1)
+    try:
+        t = time.strptime(created_str, '%d-%b-%Y')
+    except ValueError:
+        t = time.strptime(created_str, '%d-%B-%Y')
+    return datetime.datetime(*t[:6])
+
+
+def pep_number(full_path):
+    n_str = full_path.split('-')[-1].split('.')[0]
+    try:
+        n = int(n_str)
+    except ValueError:
+        raise Exception("Can't parse pep number %s" % n_str)
+
+    return n
+
+
+name_first_regex = re.compile('(.*)<.*>')
+mail_first_regex = re.compile('.*\((.*)\)')
+name_only_regex = re.compile('(.*)')
+
+months = {
+    1: 'jan',
+    2: 'feb',
+    3: 'mar',
+    4: 'apr',
+    5: 'may',
+    6: 'jun',
+    7: 'jul',
+    8: 'aug',
+    9: 'sep',
+    10: 'oct',
+    11: 'nov',
+    12: 'dec',
+}
+
+
+def clean_authors(authors_str):
+    authors = authors_str.split(',')
+    cleaned = []
+    for author in authors:
+        match = name_first_regex.match(author)
+        if match is None:
+            match = mail_first_regex.match(author)
+        if match is None:
+            match = name_only_regex.match(author)
+        cleaned.append(match.group(1).strip())
+    return " and ".join(cleaned)
+
+
+def main():
+    # get list of peps with creation time
+    # (from "Created:" string in pep .rst or .txt)
+    peps = glob.glob('pep-*.txt')
+    peps.extend(glob.glob('pep-*.rst'))
+
+    peps_with_dt = [(pep_number(full_path), pep_creation_dt(full_path), full_path) for full_path in peps]
+    # sort peps by date, newest first
+    peps_with_dt.sort()
+
+    # generate rss items for 10 most recent peps
+    items = {}
+    for n, dt, full_path in peps_with_dt:
+        title = firstline_startingwith(full_path, 'Title:')
+        authors = firstline_startingwith(full_path, 'Author:')
+        authors = clean_authors(authors)
+        url = 'https://www.python.org/dev/peps/pep-%0.4d/' % n
+        item = Entry('techreport', [
+            ('author', authors),
+            ('title', 'PEP %d: %s' % (n, title)),
+            ('institution', "Python Software Foundation"),
+            ('year', str(dt.year)),
+            ('month', months[dt.month]),
+            ('type', 'PEP'),
+            ('number', str(n)),
+            ('url', url)
+        ])
+        items['pep%d' % n] = item
+
+    bib = BibliographyData(items)
+    bib_str = bib.to_string('bibtex')
+
+    # pybtex always quotes strings, but we want month strings unquoted, so bib styles can replace it
+    bib_str = re.sub('month = "(.*)"', r'month = \1', bib_str)
+
+    with open(BIB_PATH, 'w', encoding="utf-8") as fp:
+        fp.write(bib_str)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/requirements.txt b/requirements.txt
index 837f41b3ef7..66436c1b78b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,6 @@ docutils >= 0.16
 
 # For RSS
 feedgen >= 0.9.0  # For RSS feed
+
+# For bibliography
+pybtex >= 0.24.0
\ No newline at end of file

From aef5fa974d71dd7e4b891d4055ca09ca60e2ecc2 Mon Sep 17 00:00:00 2001
From: Adrian Freund <adrian@freund.io>
Date: Tue, 21 Sep 2021 17:20:09 +0200
Subject: [PATCH 2/3] Pull duplicated code into extra file

---
 pep2bib.py             | 64 ++++++++----------------------------------
 pep2rss.py             | 35 ++++-------------------
 pep_parsing_helpers.py | 56 ++++++++++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+), 83 deletions(-)
 create mode 100644 pep_parsing_helpers.py

diff --git a/pep2bib.py b/pep2bib.py
index 6287c0c64b0..865f05f0a28 100755
--- a/pep2bib.py
+++ b/pep2bib.py
@@ -2,62 +2,21 @@
 
 # usage: python3 pep2bib.py .
 
-import datetime
 import glob
 import os
 import re
 import sys
-import time
 from pybtex.database import Entry, BibliographyData
 
+from pep_parsing_helpers import pep_number, pep_creation_dt, first_line_starting_with, parse_authors
+
 BIB_PATH = os.path.join(sys.argv[1], 'peps.bib')
 
 
-def firstline_startingwith(full_path, text):
-    result = None
-    for line in open(full_path, encoding="utf-8"):
-        if result is not None:
-            if not line[0].strip():  # Line begins with whitespace
-                result += line
-            else:
-                return result
-        if line.startswith(text):
-            result = line[len(text):].strip()
-    return None
-
-
-def pep_creation_dt(full_path):
-    created_str = firstline_startingwith(full_path, 'Created:')
-    # bleh, I was hoping to avoid re but some PEPs editorialize
-    # on the Created line
-    m = re.search(r'''(\d+-\w+-\d{4})''', created_str)
-    if not m:
-        # some older ones have an empty line, that's okay, if it's old
-        # we ipso facto don't care about it.
-        # "return None" would make the most sense but datetime objects
-        # refuse to compare with that. :-|
-        return datetime.datetime(*time.localtime(0)[:6])
-    created_str = m.group(1)
-    try:
-        t = time.strptime(created_str, '%d-%b-%Y')
-    except ValueError:
-        t = time.strptime(created_str, '%d-%B-%Y')
-    return datetime.datetime(*t[:6])
-
-
-def pep_number(full_path):
-    n_str = full_path.split('-')[-1].split('.')[0]
-    try:
-        n = int(n_str)
-    except ValueError:
-        raise Exception("Can't parse pep number %s" % n_str)
-
-    return n
-
-
-name_first_regex = re.compile('(.*)<.*>')
-mail_first_regex = re.compile('.*\((.*)\)')
-name_only_regex = re.compile('(.*)')
+name_first_regex = re.compile(r'(.*)<.*>')
+mail_first_regex = re.compile(r'.*\((.*)\)')
+name_only_regex = re.compile(r'(.*)')
+
 
 months = {
     1: 'jan',
@@ -75,8 +34,7 @@ def pep_number(full_path):
 }
 
 
-def clean_authors(authors_str):
-    authors = authors_str.split(',')
+def authors_to_bib(authors):
     cleaned = []
     for author in authors:
         match = name_first_regex.match(author)
@@ -98,12 +56,12 @@ def main():
     # sort peps by date, newest first
     peps_with_dt.sort()
 
-    # generate rss items for 10 most recent peps
     items = {}
     for n, dt, full_path in peps_with_dt:
-        title = firstline_startingwith(full_path, 'Title:')
-        authors = firstline_startingwith(full_path, 'Author:')
-        authors = clean_authors(authors)
+        title = first_line_starting_with(full_path, 'Title:')
+        author_string = first_line_starting_with(full_path, 'Author:')
+        authors = parse_authors(author_string)
+        authors = authors_to_bib(authors)
         url = 'https://www.python.org/dev/peps/pep-%0.4d/' % n
         item = Entry('techreport', [
             ('author', authors),
diff --git a/pep2rss.py b/pep2rss.py
index 52b532f51d6..9337fe29f8d 100755
--- a/pep2rss.py
+++ b/pep2rss.py
@@ -5,15 +5,15 @@
 import datetime
 import glob
 import os
-import re
 import sys
-import time
 import PyRSS2Gen as rssgen
 import docutils.frontend
 import docutils.nodes
 import docutils.parsers.rst
 import docutils.utils
 
+from pep_parsing_helpers import pep_creation_dt, first_line_starting_with, parse_authors
+
 RSS_PATH = os.path.join(sys.argv[1], 'peps.rss')
 
 
@@ -53,38 +53,12 @@ def pep_abstract(full_path: str) -> str:
     return abstract
 
 
-def firstline_startingwith(full_path, text):
-    for line in open(full_path, encoding="utf-8"):
-        if line.startswith(text):
-            return line[len(text):].strip()
-    return None
-
-
 # get list of peps with creation time
 # (from "Created:" string in pep .rst or .txt)
 peps = glob.glob('pep-*.txt')
 peps.extend(glob.glob('pep-*.rst'))
 
 
-def pep_creation_dt(full_path):
-    created_str = firstline_startingwith(full_path, 'Created:')
-    # bleh, I was hoping to avoid re but some PEPs editorialize
-    # on the Created line
-    m = re.search(r'''(\d+-\w+-\d{4})''', created_str)
-    if not m:
-        # some older ones have an empty line, that's okay, if it's old
-        # we ipso facto don't care about it.
-        # "return None" would make the most sense but datetime objects
-        # refuse to compare with that. :-|
-        return datetime.datetime(*time.localtime(0)[:6])
-    created_str = m.group(1)
-    try:
-        t = time.strptime(created_str, '%d-%b-%Y')
-    except ValueError:
-        t = time.strptime(created_str, '%d-%B-%Y')
-    return datetime.datetime(*t[:6])
-
-
 peps_with_dt = [(pep_creation_dt(full_path), full_path) for full_path in peps]
 # sort peps by date, newest first
 peps_with_dt.sort(reverse=True)
@@ -96,8 +70,9 @@ def pep_creation_dt(full_path):
         n = int(full_path.split('-')[-1].split('.')[0])
     except ValueError:
         pass
-    title = firstline_startingwith(full_path, 'Title:')
-    author = firstline_startingwith(full_path, 'Author:')
+    title = first_line_starting_with(full_path, 'Title:')
+    authors = first_line_starting_with(full_path, 'Author:')
+    author = parse_authors(authors)[0] # RSS only supports one author
     abstract = pep_abstract(full_path)
     url = 'https://www.python.org/dev/peps/pep-%0.4d/' % n
     item = rssgen.RSSItem(
diff --git a/pep_parsing_helpers.py b/pep_parsing_helpers.py
new file mode 100644
index 00000000000..e94159afb8a
--- /dev/null
+++ b/pep_parsing_helpers.py
@@ -0,0 +1,56 @@
+import re
+import datetime
+import time
+
+
+def first_line_starting_with(full_path, text):
+    result = None
+    for line in open(full_path, encoding="utf-8"):
+        if result is not None:
+            if not line[0].strip():  # Line begins with whitespace
+                result += line
+            else:
+                return result
+        if line.startswith(text):
+            result = line[len(text):].strip()
+    return None
+
+
+def pep_creation_dt(full_path):
+    created_str = first_line_starting_with(full_path, 'Created:')
+    # bleh, I was hoping to avoid re but some PEPs editorialize
+    # on the Created line
+    m = re.search(r'''(\d+-\w+-\d{4})''', created_str)
+    if not m:
+        # some older ones have an empty line, that's okay, if it's old
+        # we ipso facto don't care about it.
+        # "return None" would make the most sense but datetime objects
+        # refuse to compare with that. :-|
+        return datetime.datetime(*time.localtime(0)[:6])
+    created_str = m.group(1)
+    try:
+        t = time.strptime(created_str, '%d-%b-%Y')
+    except ValueError:
+        t = time.strptime(created_str, '%d-%B-%Y')
+    return datetime.datetime(*t[:6])
+
+
+def pep_number(full_path):
+    n_str = full_path.split('-')[-1].split('.')[0]
+    try:
+        n = int(n_str)
+    except ValueError:
+        raise Exception("Can't parse pep number %s" % n_str)
+
+    return n
+
+
+def parse_authors(authors_str):
+    orig_authors = authors_str.split(',')
+    authors = []
+    for author in orig_authors:
+        authors.append(author.strip())
+
+    return authors
+
+

From a43c159c4a0f07d15adc43922261845f3abc3a58 Mon Sep 17 00:00:00 2001
From: Adrian Freund <adrian@freund.io>
Date: Tue, 21 Sep 2021 17:22:21 +0200
Subject: [PATCH 3/3] Fixed outdated comment

---
 pep2bib.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pep2bib.py b/pep2bib.py
index 865f05f0a28..00150cc1738 100755
--- a/pep2bib.py
+++ b/pep2bib.py
@@ -53,7 +53,7 @@ def main():
     peps.extend(glob.glob('pep-*.rst'))
 
     peps_with_dt = [(pep_number(full_path), pep_creation_dt(full_path), full_path) for full_path in peps]
-    # sort peps by date, newest first
+    # sort peps by number
     peps_with_dt.sort()
 
     items = {}