index.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. #!/usr/bin/python3
  2. import mysql.connector
  3. import requests
  4. from bs4 import BeautifulSoup
  5. import urllib.parse
  6. import re
  7. from sys import exit as exit
  8. import json
  9. import datetime
  10. import custom_email
  11. from tabulate import tabulate
  12. from configparser import ConfigParser
  13. from os import path
  14. import logging
  15. ### TO DO ###
  16. #
  17. # Print useful reports (land only, house and land, etc)
  18. # Check if db entries no longer appear online (mark expired)
  19. # When checking online from various sites, check if address already exists in db
  20. # - if so, warn user and do not add
  21. # Add date_added to initial entries
  22. # Check results against database for changes
  23. # - update and add/change date_modified
  24. # Add argument to run update query when results.py is calles
  25. # Add database column to hold parcel number. Make links to GIS servers
  26. #
  27. # IDENTIFY NEW PROPERTIES!!
  28. #
  29. # Automate db opening and closing when calling dbinsert()
  30. #
  31. #############
  32. class Property:
  33. """Description of a proerty"""
  34. def __init__ (self, site_name, type, MLS, address, city, st, zip, \
  35. county, price, acres, title='', sqft=0, bedrooms=0, baths=0, description='', link=''):
  36. self.site_name = site_name
  37. self.type = type
  38. self.MLS = MLS
  39. self.title = title
  40. self.sqft = sqft
  41. self.bedrooms = bedrooms
  42. self.baths = baths
  43. self.address = address
  44. self.city = city
  45. self.st = st
  46. self.zip = zip
  47. self.county = county
  48. self.price = price
  49. self.acres = acres
  50. self.description = description
  51. self.link = link
  52. class Parameters:
  53. '''Parameters taken from config file'''
  54. def __init__(self, file='landsearch.conf'):
  55. self.file = file
  56. if not path.exists(self.file):
  57. raise FileNotFoundError("The config file cannot be opened", self.file)
  58. try:
  59. self.config = ConfigParser()
  60. self.config.read(self.file)
  61. self.search_params = self.config['Search']
  62. self.log_params = self.config['Logging']
  63. except Exception as err:
  64. print(err, "Using default search Parameters")
  65. class Search:
  66. '''Universal Search Criteria'''
  67. def checktype(self, attribute):
  68. '''Fixes string None in config file and converts to '' '''
  69. if not attribute == 'None':
  70. return attribute
  71. else:
  72. return ''
  73. def __init__(self, file = 'landsearch.conf'):
  74. # self.file = file
  75. # if not path.exists(self.file):
  76. # raise FileNotFoundError("The config file cannot be opened", self.file)
  77. # try:
  78. # config = ConfigParser()
  79. # config.read(self.file)
  80. # search_params = config['Search']
  81. # log_params = config['Logging']
  82. # except FileNotFoundError as err:
  83. # print(err, "Using default search parameters.")
  84. # except Exception as err:
  85. # print(err, "Using default search parameters.")
  86. params = Parameters()
  87. search_params = params.search_params
  88. log_params = params.log_params
  89. logging.basicConfig(filename=log_params.get('log_file'), \
  90. level=int(log_params.get('logging_level', 30)), \
  91. format='%(asctime)s %(levelname)-8s %(message)s', \
  92. datefmt='%Y-%m-%d %H:%M:%S') ## Default log level WARNING (30)
  93. logging.getLogger("urllib3").setLevel(logging.WARNING) ## Supress Requests method logging
  94. logging.debug("Log level set to %s", logging.root.level)
  95. county = search_params.get('county', ['Gwinnett', 'Hall', 'Jackson', 'Walton', 'Barrow'])
  96. if isinstance(county, str):
  97. county = county.split(", ")
  98. type = search_params.get('type', ['farm', 'house', 'land'])
  99. if isinstance(type, str):
  100. type = type.split(", ")
  101. self.types=['land', 'farm', 'home', 'house']
  102. self.county = county
  103. self.lower_price = self.checktype(search_params.get('lower_price', 0))
  104. self.upper_price = self.checktype(search_params.get('upper_price', 525000))
  105. self.lower_acres = self.checktype(search_params.get('lower_acres', 5))
  106. self.upper_acres = self.checktype(search_params.get('upper_acres', 15))
  107. self.type = type ##accept list!
  108. self.lower_sqft = self.checktype(search_params.get('lower_sqft', ''))
  109. self.upper_sqft = self.checktype(search_params.get('upper_sqft', ''))
  110. self.lower_bedrooms = self.checktype(search_params.get('lower_bedrooms', ''))
  111. self.upper_bedrooms = self.checktype(search_params.get('upper_bedrooms', ''))
  112. for property_type in self.type:
  113. assert property_type in self.types, ("Unknown type '" + property_type + "'. Property Type must be of type: " + str(self.types))
  114. ## FOR TESTING, PRINT ALL ATTRIBUTES OF SEARCH ##
  115. logging.debug(vars(self))
  116. class ImproperSearchError(Exception):
  117. def __init__ (self, search, message="Improper Search. Must use instance of Search class"):
  118. self.search = search
  119. self.message = message
  120. super().__init__(self.message)
  121. class MLSDATA:
  122. """Fetches and stores MLS Data
  123. Currently only supports GeorgiaMLS.com (GMLS)"""
  124. counties=['Gwinnett', 'Barrow', 'Hall', 'Jackson', 'Walton']
  125. GoogleAPIKey = 'AIzaSyAXAnpBtjv760W8YIPqKZ0dFXpwAaZN7Es'
  126. live_google = True
  127. def __init__ (self, mlstype):
  128. self.help = "This is a class that will retrieve MLS data from various sources, store the info in a database, and run queries on the data."
  129. self.mlstype = mlstype.lower() ## Determines what kind of data is to be retreieve (gmls, Zillow, etc)
  130. self.cursor = ''
  131. self.cnx = ''
  132. self.new_listings = []
  133. self.email = Parameters().search_params['email']
  134. print("EMAIL: " + self.email)
  135. def stringbuilder(self, search: Search, county):
  136. """ Takes Search class and build appropriate URL query based on mlstype. Currently only supports gmls."""
  137. if self.mlstype == 'gmls':
  138. base_addr = 'https://www.georgiamls.com/real-estate/search-action.cfm?'
  139. params = [('cnty', county), \
  140. ('lpl', search.lower_price), ('lph', search.upper_price), \
  141. ('acresL', search.lower_acres), ('acresH', search.upper_acres), \
  142. ('sqftl', search.lower_sqft), ('sqfth', search.upper_sqft), \
  143. ('orderBy', 'b'), \
  144. ('scat', '1'), \
  145. ('sdsp', 'g')]
  146. for type in search.type:
  147. if 'land' in type.lower():
  148. params.append(('typ', 'll'))
  149. if 'farm' in type.lower():
  150. params.append(('typ', 'af'))
  151. if 'home' in type.lower():
  152. params.append(('typ', 'sd'))
  153. if 'house' in type.lower():
  154. params.append(('typ', 'sd'))
  155. search_string = base_addr + urllib.parse.urlencode(params)
  156. print(search_string)
  157. return search_string
  158. def break_address(self, address):
  159. """Takes an address string in the form 'street address|city, state zip' and returns a list"""
  160. street = address[:address.find('|')]
  161. csz = address[address.find('|')+1:]
  162. city = csz[:csz.find(',')]
  163. st = csz[csz.find(',')+1:].split(' ')[1]
  164. zip = csz[csz.find(',')+1:].split(' ')[2]
  165. split_address = [street, city, st, zip]
  166. return split_address
  167. def gmlsparser(self, URL, county, pages=''):
  168. """ Retrieve the website for georgiamls.com and returns a list of Property objects.
  169. UNIQUE TO GEORGIAMLS.COM ONLY!!"""
  170. properties_list = []
  171. r = requests.get(URL)
  172. soup = BeautifulSoup(r.content, 'html5lib')
  173. if pages == '':
  174. try:
  175. pages = soup.find("div", {'class':'small listing-pagination-count'}).getText().strip().split(" ")[-1]
  176. current_page = soup.find("div", {'class':'small listing-pagination-count'}).getText().strip().split(" ")[-3]
  177. except AttributeError as err:
  178. print("No Results Found.")
  179. return
  180. else:
  181. print('pages already set to: ' + str(pages))
  182. for page in range(0, int(pages)):
  183. print('Processing Page: ' + str(page + 1) + ' of ' + str(pages))
  184. if not page == 0:
  185. next_URL = URL + '&start=' + str(((12*page)+1))
  186. soup = BeautifulSoup(requests.get(next_URL).content, 'html5lib')
  187. raw_listings = soup.findAll("div", {'class':'col-xs-12 col-sm-6 col-lg-4 text-center listing-gallery'})
  188. for listing in raw_listings:
  189. items = listing.findAll("p") ##
  190. site_name = self.mlstype
  191. MLS = " ".join(items[3].getText().strip()[6:15].split()) ## MLS NUMBER
  192. title = '' ## Listing Title (address if no title)
  193. price = items[0].string.strip() ## Price
  194. if self.mlstype == 'gmls':
  195. link = 'https://www.georgiamls.com' + listing.a['href']
  196. detail_request = requests.get(link)
  197. detail_soup = BeautifulSoup(detail_request.content, 'html5lib')
  198. details = detail_soup.findAll('tr')
  199. bedbath = details[1].findAll('td')[1].getText().strip().split('/')
  200. br = bedbath[0][:-3]
  201. ba = bedbath[1][:-3]
  202. baths = ba ## IF House is present
  203. bedrooms = br ## IF House is present
  204. address = ''
  205. for element in details:
  206. if 'sqft' in element.getText():
  207. sqft = element.findAll('td')[1].getText().strip()[:-5].replace(',','')
  208. if 'lot size' in element.getText().lower():
  209. acres = element.findAll('td')[1].getText().strip()[:-6]
  210. if 'Property Type' in element.getText():
  211. ptype = element.findAll('td')[1].getText().strip()
  212. if 'acreage' in ptype.lower():
  213. type = 'af'
  214. elif 'land lot' in ptype.lower():
  215. type = 'll'
  216. elif 'single family home' in ptype.lower():
  217. type = 'sf'
  218. else:
  219. type = 'unknown'
  220. if 'Address' in element.getText():
  221. if not address: #Prevents finding the word 'address' elsewhere in the listings
  222. address = element.findAll('td')[1]
  223. #7 print("TEST ADDRESS: ", element)
  224. street_address = list(address)[0].strip()
  225. csz = list(address)[2].strip()
  226. split_address = self.break_address(street_address + '|' + csz)
  227. description = detail_soup.find('div', {'id':'listing-remarks'}).getText().strip().replace('\t','')
  228. data = Property(site_name = self.mlstype, \
  229. type = type, \
  230. MLS = MLS, \
  231. bedrooms = bedrooms, \
  232. baths = baths, \
  233. sqft = sqft, \
  234. address = split_address[0], \
  235. city = split_address[1].title(), \
  236. st = split_address[2].upper(), \
  237. zip = split_address[3], \
  238. county = county.title(), \
  239. price = price.replace('$','').replace(',',''), \
  240. acres = acres, \
  241. description = description, \
  242. link = link)
  243. properties_list.append(data)
  244. print('Scanned: ' + data.address)
  245. return properties_list
  246. def getmlsdata(self, search: Search, county):
  247. """This is the main entrypoint. Takes arguments to pass to stringbuilder to create the URL.
  248. Selects appropriate parser based on self.mlstype from class intance.
  249. Needs any modifications from the standard search ($0 to $500,000, 5 to 15 acres, etc)
  250. See class search for more information.
  251. --> 9/1/20 - takes Search class as argument. All properties are handled by the class <--"""
  252. if isinstance(search, Search):
  253. if not county in self.counties: ### FIX for lower()
  254. print("County " + county + " not regognized. Exiting")
  255. else:
  256. print("Scanning for results in " + county + " using the " + self.mlstype.upper() + " database.")
  257. if self.mlstype == 'gmls':
  258. list = self.gmlsparser(self.stringbuilder(search, county), county)
  259. return list
  260. else:
  261. raise ImproperSearchError(search)
  262. def checkdb(self, criteria_dict):
  263. """Check dictionary of critera against database.
  264. Currently accepts keys: MLS, title, address (street number/name, not city/state/zip).
  265. Returns True if records exists."""
  266. if self.cursor: ## Check if DB is connected
  267. for criteria in criteria_dict:
  268. ## Determine criteria passed, and execute queries for each
  269. if criteria == 'MLS':
  270. self.cursor.execute("SELECT COUNT(*) FROM properties WHERE MLS = %(MLS)s GROUP BY id", {criteria:criteria_dict[criteria]})
  271. if self.cursor.rowcount > 0: return self.cursor.rowcount # stop for loop if match already found.
  272. elif criteria == 'title':
  273. self.cursor.execute("SELECT COUNT(*) FROM properties WHERE title = %(title)s GROUP BY id", {criteria:criteria_dict[criteria]})
  274. if self.cursor.rowcount > 0: return self.cursor.rowcount # stop for loop if match already found.
  275. elif criteria == 'address':
  276. self.cursor.execute("SELECT COUNT(*) FROM properties WHERE address = %(address)s GROUP BY id", {criteria:criteria_dict[criteria]})
  277. if self.cursor.rowcount > 0: return self.cursor.rowcount # stop for loop if match already found.
  278. else:
  279. print("Cannot search on parameter: " + criteria)
  280. return self.cursor.rowcount
  281. else:
  282. print("Database is not connected or cursor not filled. Use function 'connectdb()' to establish")
  283. def getGoogle(self, property):
  284. """Supplies date from Google Distance Matrix API to populate
  285. distance_to_work
  286. time_to_work
  287. distance_to_school
  288. time_to_school
  289. Costs money, so it should only be called when inserting a new db record.
  290. Returns distance in METERS (1m = 0.000621371 mi) and time in SECONDS
  291. returns fully populated Propery object."""
  292. print("Fetching live Google Data. $$")
  293. destination1 = 'Hebron Christian Acadamy' ## Working query for Hebron Christian Acadamy
  294. destination2 = 'JHRJ+FJ Atlanta, Georgia' ## Plus code for Hourly parking at Int'l Terminal, KATL
  295. params = {}
  296. params['units'] = 'imperial'
  297. params['origins'] = property.address + ', ' + property.city + ' ' + property.st
  298. params['destinations'] = 'Hebron Christian Acadamy|JHRJ+FJ Atlanta, Georgia'
  299. params['key'] = self.GoogleAPIKey
  300. baseURL = 'https://maps.googleapis.com/maps/api/distancematrix/json?'
  301. API_URL = baseURL + urllib.parse.urlencode(params)
  302. # print(API_URL)
  303. # Send Request and capture result as json
  304. try:
  305. google_result = requests.get(API_URL).json()
  306. if google_result['status'] == 'OK':
  307. property.distance_to_school = google_result['rows'][0]['elements'][0]['distance']['value']
  308. property.time_to_school = google_result['rows'][0]['elements'][0]['duration']['value']
  309. property.distance_to_work = google_result['rows'][0]['elements'][1]['distance']['value']
  310. property.time_to_work = google_result['rows'][0]['elements'][1]['duration']['value']
  311. except:
  312. print("ERROR: Failed to obtain Google API data")
  313. def insertrecord(self, property, work_address=None, school_address=None):
  314. """Inserts record into database. Takes argument Property class object.
  315. FUTURE - add date_added field to insert operation."""
  316. if self.cursor:
  317. criteria_dict = property.__dict__
  318. criteria_dict['Date_Added'] = str(datetime.date.today())
  319. placeholder_columns = ", ".join(criteria_dict.keys())
  320. placeholder_values = ", ".join([":{0}".format(col) for col in criteria_dict.keys()])
  321. qry = "INSERT INTO properties ({placeholder_columns}) VALUES {placeholder_values}".format(placeholder_columns=placeholder_columns, placeholder_values=tuple(criteria_dict.values()))
  322. self.cursor.execute(qry)
  323. self.cnx.commit()
  324. print("Inserted " + criteria_dict['MLS'] + " | " + criteria_dict['address'] + " into database.")
  325. else:
  326. print("Database is not connected or cursor not filled. Use function 'connectdb()' to establish")
  327. def connectdb(self, host='192.168.100.26', user='landsearchuser', password='1234', database='landsearch'):
  328. """Connects to database and returns a cursor object"""
  329. self.cnx = mysql.connector.connect(host=host, user=user, password=password, database=database, buffered=True)
  330. self.cursor = self.cnx.cursor()
  331. return self.cursor
  332. def closedb(self):
  333. """Cleanly close the db."""
  334. self.cursor.close()
  335. self.cnx.close()
  336. def dbinsert(self, properties: list):
  337. """Inserts records into database. Takes list of Property class objects"""
  338. if not properties == None:
  339. if not isinstance(properties, list):
  340. raise TypeError('type list required')
  341. for property in properties:
  342. if not self.checkdb({'MLS': property.MLS, 'address': property.address}):
  343. if self.live_google: self.getGoogle(property) ## <- This will populate distance and time fields if set TRUE
  344. self.insertrecord(property)
  345. self.new_listings.append(property)
  346. else:
  347. print(property.MLS + ' | ' + property.address + ' is already in db. Not inserted.')
  348. ##REMOVE FOR TESTING###
  349. # self.new_listings.append(property)
  350. #######################
  351. else:
  352. print("Empty dataset. No records to insert.")
  353. def alerts(self):
  354. pass
  355. def email_results(self):
  356. pass
  357. if self.email:
  358. logging.debug("email_results" + str(self.email))
  359. body = ''
  360. data = []
  361. subj = "New Real Estate Listings for " + str(datetime.date.today())
  362. for listing in self.new_listings:
  363. row = []
  364. body += listing.MLS + " | " + listing.address + " | " + listing.acres + " | " + listing.price + " | " + listing.link + "\n"
  365. row.append(listing.MLS)
  366. row.append(listing.address)
  367. row.append('{:0,.2f}'.format(float(listing.acres)))
  368. row.append(listing.sqft)
  369. row.append('${:0,.0f}'.format(int(listing.price)))
  370. row.append(listing.time_to_school/60 if hasattr(listing, 'time_to_school') else 'NA')
  371. row.append(listing.link)
  372. data.append(row)
  373. body = """\
  374. Daily Real Estate Search Report\n
  375. The following properties have been found which may be of interest.\n
  376. """
  377. results = tabulate(data, headers=['MLS', 'Address', 'Acres', 'sqft', 'Price', 'Time to School', 'link'])
  378. body += results
  379. sendto = ['stagl.mike@gmail.com', 'M_Stagl@hotmail.com']
  380. mymail = custom_email.simplemail(subj, body, sendto)
  381. if len(self.new_listings) > 0:
  382. try:
  383. mymail.sendmail()
  384. except Exception as e:
  385. print("Error sending email. " + e)
  386. logging.warning("Error sending email. " + e)
  387. else:
  388. print("No new listings. Email not sent")
  389. logging.info("No new listings. Email not sent")
  390. else:
  391. print("Suppressing email based on landsearch.conf preferences.")
  392. logging.warning("Suppressing email based on landsearch.conf preferences.")
  393. if __name__ == '__main__':
  394. gmls = MLSDATA('GMLS') # Create MLSDATA object
  395. mysearch = Search() # Create a custom search object
  396. myresults = []
  397. ## Create function in MLSDATA module:
  398. # - takes counties from configparser and calls getmlsdata for each county.
  399. # - Compiles results into single list and returns that list
  400. # - User code would look something like this:
  401. # _ mysearch = Search()
  402. # _ mydata = gmls.findalllistings(mysearch) # This would control the looping of counties and return a list like normal
  403. # _ gmls.dbinsert(myresults) # This would automate db opening and closing
  404. for county in mysearch.county:
  405. print("local search: ", county)
  406. mysearch = Search() ## Search used to take county as parameter, so this loop would work. Now Search class contains list. loop must occur in getmlsdata module
  407. mydata = gmls.getmlsdata(mysearch, county)
  408. if mydata: #Avoids a crash is there is no data
  409. for listing in mydata:
  410. myresults.append(listing)
  411. # print(len(myresults))
  412. # print(myresults[0].address)
  413. gmls.connectdb()
  414. gmls.dbinsert(myresults)
  415. gmls.closedb()
  416. gmls.email_results()