Source code for findnationality
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os, sys
import requests
from bs4 import BeautifulSoup
# finds the nationality of each judoka fighting in the
# 1. Oesterreichischen Judo Bundesliga form https://www.judoinside.com
[docs]class Nationality:
def __inf__(self):
"""
creates an Nationality Object
"""
pass
[docs] def runFindNationality(self):
"""
finds the nationality of each judoka, fighting in the 1. Oesterreichischen Judo Bundesliga
form https://www.judoinside.com and stores the data collected in a .txt file
"""
url = "https://www.judoinside.com/site/search?q="
placeToStoreResults = '../results/'
startYear = 2011
endYear = 2017
for year in range(startYear, endYear):
print("current year")
print(year)
nameNations = placeToStoreResults + str(year) + '/nationalities' + str(year) + '.txt'
fileBegegnungen = open(nameNations, 'w') # ATTENTION: overwrites file
fileBegegnungen.write('name; nationality\n')
names = open(placeToStoreResults + str(year) + "/kaempfer" + str(year) + ".txt").read().splitlines()
handwritenNations = open(
'../falseClassification/' + "falseClassification" + str(year) + ".txt").read().splitlines()
# print(handwritenNations)
nameCountry = dict()
for line in handwritenNations:
nameNation = line.split("; ")
nameCountry[nameNation[0].decode('string-escape')] = nameNation[1].decode('string-escape')
for name in names:
# TODO does not work correctly
nameWithoutWhitespaces = name.replace(" ", "+")
# make name readbale for judo inside
# nameWithoutWhitespaces = nameWithoutWhitespaces.lower().replace("ä", "ae").replace("ö", "oe").replace("ü", "ue")\
# .replace("ß", "ss")
data = {"headerSearch"}
r = requests.get(url + name)
soupFightsOverview = BeautifulSoup(r.content, 'html.parser')
judoka = soupFightsOverview.find_all(id="judokaUserDatas")
if name in nameCountry:
print("Name from List " + str(name) + "; " + str(nameCountry.get(name)))
line = str(name) + "; " + str(nameCountry.get(name))
fileBegegnungen.write(line + "\n")
elif len(judoka) > 0:
judokadeteils = judoka[0].find_all("li")
nationName = "" + str(judokadeteils[0])
nation = nationName.replace("<li><span>Country:</span>", "").replace("</li>", "")
line = name + "; " + nation
print(line)
fileBegegnungen.write(line + "\n")
else:
line = name + "; MissingCo"
print(line)
fileBegegnungen.write(line + "\n")
fileBegegnungen.close()
if __name__ == "__main__":
nationalityFighter = Nationality()
nationalityFighter.runFindNationality()