1
2 """
3 Pure Python GeoIP API
4
5 The API is based on MaxMind's C-based Python API, but the code itself is
6 ported from the Pure PHP GeoIP API by Jim Winstead and Hans Lellelid.
7
8 @author: Jennifer Ennis <zaylea@gmail.com>
9
10 @license: Copyright(C) 2004 MaxMind LLC
11
12 This program is free software: you can redistribute it and/or modify
13 it under the terms of the GNU Lesser General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or
15 (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU Lesser General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
24 """
25
26 import os
27 import math
28 import socket
29 import mmap
30 import codecs
31 from threading import Lock
32
33 try:
34 from StringIO import StringIO
35 except ImportError:
36 from io import StringIO, BytesIO
37
38 import pygeoip.const
39 from pygeoip import util
40 from pygeoip.const import PY2, PY3
41 from pygeoip.timezone import time_zone_by_country_and_region
42
43
44 STANDARD = const.STANDARD
45 MMAP_CACHE = const.MMAP_CACHE
46 MEMORY_CACHE = const.MEMORY_CACHE
47
48 ENCODING = const.ENCODING
49
50
53
54
76
77
78 GeoIPBase = GeoIPMetaclass('GeoIPBase', (object,), {})
79
80
83 """
84 Initialize the class.
85
86 @param filename: Path to a geoip database.
87 @type filename: str
88 @param flags: Flags that affect how the database is processed.
89 Currently supported flags are STANDARD (the default),
90 MEMORY_CACHE (preload the whole file into memory) and
91 MMAP_CACHE (access the file via mmap).
92 @type flags: int
93 """
94 self._filename = filename
95 self._flags = flags
96
97 if self._flags & const.MMAP_CACHE:
98 f = open(filename, 'rb')
99 access = mmap.ACCESS_READ
100 self._filehandle = mmap.mmap(f.fileno(), 0, access=access)
101 f.close()
102
103 elif self._flags & const.MEMORY_CACHE:
104 f = open(filename, 'rb')
105 self._memoryBuffer = f.read()
106 iohandle = BytesIO if PY3 else StringIO
107 self._filehandle = iohandle(self._memoryBuffer)
108 f.close()
109
110 else:
111 self._filehandle = codecs.open(filename, 'rb', ENCODING)
112
113 self._lock = Lock()
114 self._setup_segments()
115
117 """
118 Parses the database file to determine what kind of database is
119 being used and setup segment sizes and start points that will
120 be used by the seek*() methods later.
121
122 Supported databases:
123
124 * COUNTRY_EDITION
125 * COUNTRY_EDITION_V6
126 * REGION_EDITION_REV0
127 * REGION_EDITION_REV1
128 * CITY_EDITION_REV0
129 * CITY_EDITION_REV1
130 * ORG_EDITION
131 * ISP_EDITION
132 * ASNUM_EDITION
133
134 """
135 self._databaseType = const.COUNTRY_EDITION
136 self._recordLength = const.STANDARD_RECORD_LENGTH
137 self._databaseSegments = const.COUNTRY_BEGIN
138
139 self._lock.acquire()
140 filepos = self._filehandle.tell()
141 self._filehandle.seek(-3, os.SEEK_END)
142
143 for i in range(const.STRUCTURE_INFO_MAX_SIZE):
144 chars = chr(255) * 3
145 delim = self._filehandle.read(3)
146
147 if PY3 and type(delim) is bytes:
148 delim = delim.decode(ENCODING)
149
150 if PY2:
151 chars = chars.decode(ENCODING)
152 if type(delim) is str:
153 delim = delim.decode(ENCODING)
154
155 if delim == chars:
156 byte = self._filehandle.read(1)
157 self._databaseType = ord(byte)
158
159
160 if (self._databaseType >= 106):
161 self._databaseType -= 105
162
163 if self._databaseType == const.REGION_EDITION_REV0:
164 self._databaseSegments = const.STATE_BEGIN_REV0
165
166 elif self._databaseType == const.REGION_EDITION_REV1:
167 self._databaseSegments = const.STATE_BEGIN_REV1
168
169 elif self._databaseType in (const.CITY_EDITION_REV0,
170 const.CITY_EDITION_REV1,
171 const.ORG_EDITION,
172 const.ISP_EDITION,
173 const.ASNUM_EDITION):
174 self._databaseSegments = 0
175 buf = self._filehandle.read(const.SEGMENT_RECORD_LENGTH)
176
177 if PY3 and type(buf) is bytes:
178 buf = buf.decode(ENCODING)
179
180 for j in range(const.SEGMENT_RECORD_LENGTH):
181 self._databaseSegments += (ord(buf[j]) << (j * 8))
182
183 LONG_RECORDS = (const.ORG_EDITION, const.ISP_EDITION)
184 if self._databaseType in LONG_RECORDS:
185 self._recordLength = const.ORG_RECORD_LENGTH
186 break
187 else:
188 self._filehandle.seek(-4, os.SEEK_CUR)
189
190 self._filehandle.seek(filepos, os.SEEK_SET)
191 self._lock.release()
192
194 """
195 Using the record length and appropriate start points, seek to the
196 country that corresponds to the converted IP address integer.
197
198 @param ipnum: result of ip2long conversion
199 @type ipnum: int
200 @return: offset of start of record
201 @rtype: int
202 """
203 offset = 0
204 seek_depth = 127 if len(str(ipnum)) > 10 else 31
205
206 for depth in range(seek_depth, -1, -1):
207 if self._flags & const.MEMORY_CACHE:
208 startIndex = 2 * self._recordLength * offset
209 endIndex = startIndex + (2 * self._recordLength)
210 buf = self._memoryBuffer[startIndex:endIndex]
211 else:
212 startIndex = 2 * self._recordLength * offset
213 readLength = 2 * self._recordLength
214 self._lock.acquire()
215 self._filehandle.seek(startIndex, os.SEEK_SET)
216 buf = self._filehandle.read(readLength)
217 self._lock.release()
218
219 if PY3 and type(buf) is bytes:
220 buf = buf.decode(ENCODING)
221
222 x = [0, 0]
223 for i in range(2):
224 for j in range(self._recordLength):
225 byte = buf[self._recordLength * i + j]
226 x[i] += ord(byte) << (j * 8)
227 if ipnum & (1 << depth):
228 if x[1] >= self._databaseSegments:
229 return x[1]
230 offset = x[1]
231 else:
232 if x[0] >= self._databaseSegments:
233 return x[0]
234 offset = x[0]
235
236 raise GeoIPError('Corrupt database')
237
239 """
240 Seek and return organization or ISP name for ipnum.
241 @param ipnum: Converted IP address
242 @type ipnum: int
243 @return: org/isp name
244 @rtype: str
245 """
246 seek_org = self._seek_country(ipnum)
247 if seek_org == self._databaseSegments:
248 return None
249
250 read_length = (2 * self._recordLength - 1) * self._databaseSegments
251 self._lock.acquire()
252 self._filehandle.seek(seek_org + read_length, os.SEEK_SET)
253 buf = self._filehandle.read(const.MAX_ORG_RECORD_LENGTH)
254 self._lock.release()
255
256 if PY3 and type(buf) is bytes:
257 buf = buf.decode(ENCODING)
258
259 return buf[:buf.index(chr(0))]
260
262 """
263 Seek and return the region info (dict containing country_code
264 and region_name).
265
266 @param ipnum: Converted IP address
267 @type ipnum: int
268 @return: dict containing country_code and region_name
269 @rtype: dict
270 """
271 region = ''
272 country_code = ''
273 seek_country = self._seek_country(ipnum)
274
275 def get_region_name(offset):
276 region1 = chr(offset // 26 + 65)
277 region2 = chr(offset % 26 + 65)
278 return ''.join([region1, region2])
279
280 if self._databaseType == const.REGION_EDITION_REV0:
281 seek_region = seek_country - const.STATE_BEGIN_REV0
282 if seek_region >= 1000:
283 country_code = 'US'
284 region = get_region_name(seek_region - 1000)
285 else:
286 country_code = const.COUNTRY_CODES[seek_region]
287 elif self._databaseType == const.REGION_EDITION_REV1:
288 seek_region = seek_country - const.STATE_BEGIN_REV1
289 if seek_region < const.US_OFFSET:
290 pass
291 elif seek_region < const.CANADA_OFFSET:
292 country_code = 'US'
293 region = get_region_name(seek_region - const.US_OFFSET)
294 elif seek_region < const.WORLD_OFFSET:
295 country_code = 'CA'
296 region = get_region_name(seek_region - const.CANADA_OFFSET)
297 else:
298 index = (seek_region - const.WORLD_OFFSET) // const.FIPS_RANGE
299 if index in const.COUNTRY_CODES:
300 country_code = const.COUNTRY_CODES[index]
301 elif self._databaseType in const.CITY_EDITIONS:
302 rec = self._get_record(ipnum)
303 country_code = rec['country_code'] if 'country_code' in rec else ''
304 region = rec['region_name'] if 'region_name' in rec else ''
305
306 return {'country_code': country_code, 'region_name': region}
307
309 """
310 Populate location dict for converted IP.
311
312 @param ipnum: Converted IP address
313 @type ipnum: int
314 @return: dict with country_code, country_code3, country_name,
315 region, city, postal_code, latitude, longitude,
316 dma_code, metro_code, area_code, region_name, time_zone
317 @rtype: dict
318 """
319 seek_country = self._seek_country(ipnum)
320 if seek_country == self._databaseSegments:
321 return None
322
323 read_length = (2 * self._recordLength - 1) * self._databaseSegments
324 self._lock.acquire()
325 self._filehandle.seek(seek_country + read_length, os.SEEK_SET)
326 buf = self._filehandle.read(const.FULL_RECORD_LENGTH)
327 self._lock.release()
328
329 if PY3 and type(buf) is bytes:
330 buf = buf.decode(ENCODING)
331
332 record = {
333 'dma_code': 0,
334 'area_code': 0,
335 'metro_code': '',
336 'postal_code': ''
337 }
338
339 latitude = 0
340 longitude = 0
341 buf_pos = 0
342
343
344 char = ord(buf[buf_pos])
345 record['country_code'] = const.COUNTRY_CODES[char]
346 record['country_code3'] = const.COUNTRY_CODES3[char]
347 record['country_name'] = const.COUNTRY_NAMES[char]
348 buf_pos += 1
349
350 def get_data(buf, buf_pos):
351 offset = buf_pos
352 char = ord(buf[offset])
353 while (char != 0):
354 offset += 1
355 char = ord(buf[offset])
356 if offset > buf_pos:
357 return (offset, buf[buf_pos:offset])
358 return (offset, '')
359
360 offset, record['region_name'] = get_data(buf, buf_pos)
361 offset, record['city'] = get_data(buf, offset + 1)
362 offset, record['postal_code'] = get_data(buf, offset + 1)
363 buf_pos = offset + 1
364
365 for j in range(3):
366 char = ord(buf[buf_pos])
367 buf_pos += 1
368 latitude += (char << (j * 8))
369
370 for j in range(3):
371 char = ord(buf[buf_pos])
372 buf_pos += 1
373 longitude += (char << (j * 8))
374
375 record['latitude'] = (latitude / 10000.0) - 180.0
376 record['longitude'] = (longitude / 10000.0) - 180.0
377
378 if self._databaseType == const.CITY_EDITION_REV1:
379 dmaarea_combo = 0
380 if record['country_code'] == 'US':
381 for j in range(3):
382 char = ord(buf[buf_pos])
383 dmaarea_combo += (char << (j * 8))
384 buf_pos += 1
385
386 record['dma_code'] = int(math.floor(dmaarea_combo / 1000))
387 record['area_code'] = dmaarea_combo % 1000
388
389 if record['dma_code'] in const.DMA_MAP:
390 record['metro_code'] = const.DMA_MAP[record['dma_code']]
391
392 params = (record['country_code'], record['region_name'])
393 record['time_zone'] = time_zone_by_country_and_region(*params)
394
395 return record
396
398 if self._databaseType in const.IPV6_EDITIONS:
399 try:
400 response = socket.getaddrinfo(hostname, 0, socket.AF_INET6)
401 family, socktype, proto, canonname, sockaddr = response[0]
402 address, port, flow, scope = sockaddr
403 return address
404 except socket.gaierror:
405 return ''
406 else:
407 return socket.gethostbyname(hostname)
408
410 """
411 Get the country index.
412 Looks up the index for the country which is the key for
413 the code and name.
414
415 @param addr: The IP address
416 @type addr: str
417 @return: network byte order 32-bit integer
418 @rtype: int
419 """
420 ipnum = util.ip2long(addr)
421 if not ipnum:
422 raise ValueError("Invalid IP address: %s" % addr)
423
424 COUNTY_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
425 if self._databaseType not in COUNTY_EDITIONS:
426 message = 'Invalid database type, expected Country'
427 raise GeoIPError(message)
428
429 return self._seek_country(ipnum) - const.COUNTRY_BEGIN
430
432 """
433 Returns 2-letter country code (e.g. 'US') for specified IP address.
434 Use this method if you have a Country, Region, or City database.
435
436 @param addr: IP address
437 @type addr: str
438 @return: 2-letter country code
439 @rtype: str
440 """
441 try:
442 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
443 if self._databaseType in VALID_EDITIONS:
444 ipv = 6 if addr.find(':') >= 0 else 4
445
446 if ipv == 4 and self._databaseType != const.COUNTRY_EDITION:
447 message = 'Invalid database type; expected IPv6 address'
448 raise ValueError(message)
449 if ipv == 6 and self._databaseType != const.COUNTRY_EDITION_V6:
450 message = 'Invalid database type; expected IPv4 address'
451 raise ValueError(message)
452
453 country_id = self.id_by_addr(addr)
454
455 return const.COUNTRY_CODES[country_id]
456 elif self._databaseType in const.REGION_CITY_EDITIONS:
457 return self.region_by_addr(addr)['country_code']
458
459 message = 'Invalid database type, expected Country, City or Region'
460 raise GeoIPError(message)
461 except ValueError:
462 raise GeoIPError('Failed to lookup address %s' % addr)
463
465 """
466 Returns 2-letter country code (e.g. 'US') for specified hostname.
467 Use this method if you have a Country, Region, or City database.
468
469 @param hostname: Hostname
470 @type hostname: str
471 @return: 2-letter country code
472 @rtype: str
473 """
474 addr = self._gethostbyname(hostname)
475 return self.country_code_by_addr(addr)
476
478 """
479 Returns full country name for specified IP address.
480 Use this method if you have a Country or City database.
481
482 @param addr: IP address
483 @type addr: str
484 @return: country name
485 @rtype: str
486 """
487 try:
488 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
489 if self._databaseType in VALID_EDITIONS:
490 return const.COUNTRY_NAMES[self.id_by_addr(addr)]
491 elif self._databaseType in const.CITY_EDITIONS:
492 return self.record_by_addr(addr)['country_name']
493 else:
494 message = 'Invalid database type, expected Country or City'
495 raise GeoIPError(message)
496 except ValueError:
497 raise GeoIPError('Failed to lookup address %s' % addr)
498
500 """
501 Returns full country name for specified hostname.
502 Use this method if you have a Country database.
503
504 @param hostname: Hostname
505 @type hostname: str
506 @return: country name
507 @rtype: str
508 """
509 addr = self._gethostbyname(hostname)
510 return self.country_name_by_addr(addr)
511
513 """
514 Lookup Organization, ISP or ASNum for given IP address.
515 Use this method if you have an Organization, ISP or ASNum database.
516
517 @param addr: IP address
518 @type addr: str
519 @return: organization or ISP name
520 @rtype: str
521 """
522 try:
523 ipnum = util.ip2long(addr)
524 if not ipnum:
525 raise ValueError('Invalid IP address')
526
527 valid = (const.ORG_EDITION, const.ISP_EDITION, const.ASNUM_EDITION)
528 if self._databaseType not in valid:
529 message = 'Invalid database type, expected Org, ISP or ASNum'
530 raise GeoIPError(message)
531
532 return self._get_org(ipnum)
533 except ValueError:
534 raise GeoIPError('Failed to lookup address %s' % addr)
535
537 """
538 Lookup the organization (or ISP) for hostname.
539 Use this method if you have an Organization/ISP database.
540
541 @param hostname: Hostname
542 @type hostname: str
543 @return: Organization or ISP name
544 @rtype: str
545 """
546 addr = self._gethostbyname(hostname)
547 return self.org_by_addr(addr)
548
550 """
551 Look up the record for a given IP address.
552 Use this method if you have a City database.
553
554 @param addr: IP address
555 @type addr: str
556 @return: Dictionary with country_code, country_code3, country_name,
557 region, city, postal_code, latitude, longitude, dma_code,
558 metro_code, area_code, region_name, time_zone
559 @rtype: dict
560 """
561 try:
562 ipnum = util.ip2long(addr)
563 if not ipnum:
564 raise ValueError('Invalid IP address')
565
566 if self._databaseType not in const.CITY_EDITIONS:
567 message = 'Invalid database type, expected City'
568 raise GeoIPError(message)
569
570 return self._get_record(ipnum)
571 except ValueError:
572 raise GeoIPError('Failed to lookup address %s' % addr)
573
575 """
576 Look up the record for a given hostname.
577 Use this method if you have a City database.
578
579 @param hostname: Hostname
580 @type hostname: str
581 @return: Dictionary with country_code, country_code3, country_name,
582 region, city, postal_code, latitude, longitude, dma_code,
583 metro_code, area_code, region_name, time_zone
584 @rtype: dict
585 """
586 addr = self._gethostbyname(hostname)
587 return self.record_by_addr(addr)
588
590 """
591 Lookup the region for given IP address.
592 Use this method if you have a Region database.
593
594 @param addr: IP address
595 @type addr: str
596 @return: Dictionary containing country_code, region and region_name
597 @rtype: dict
598 """
599 try:
600 ipnum = util.ip2long(addr)
601 if not ipnum:
602 raise ValueError('Invalid IP address')
603
604 if self._databaseType not in const.REGION_CITY_EDITIONS:
605 message = 'Invalid database type, expected Region or City'
606 raise GeoIPError(message)
607
608 return self._get_region(ipnum)
609 except ValueError:
610 raise GeoIPError('Failed to lookup address %s' % addr)
611
613 """
614 Lookup the region for given hostname.
615 Use this method if you have a Region database.
616
617 @param hostname: Hostname
618 @type hostname: str
619 @return: Dictionary containing country_code, region, and region_name
620 @rtype: dict
621 """
622 addr = self._gethostbyname(hostname)
623 return self.region_by_addr(addr)
624
626 """
627 Look up the time zone for a given IP address.
628 Use this method if you have a Region or City database.
629
630 @param addr: IP address
631 @type addr: str
632 @return: Time zone
633 @rtype: str
634 """
635 try:
636 ipnum = util.ip2long(addr)
637 if not ipnum:
638 raise ValueError('Invalid IP address')
639
640 if self._databaseType not in const.CITY_EDITIONS:
641 message = 'Invalid database type, expected City'
642 raise GeoIPError(message)
643
644 return self._get_record(ipnum)['time_zone']
645 except ValueError:
646 raise GeoIPError('Failed to lookup address %s' % addr)
647
649 """
650 Look up the time zone for a given hostname.
651 Use this method if you have a Region or City database.
652
653 @param hostname: Hostname
654 @type hostname: str
655 @return: Time zone
656 @rtype: str
657 """
658 addr = self._gethostbyname(hostname)
659 return self.time_zone_by_addr(addr)
660